Component: K-MEANS CLUSTERING
import numpy as ny
import pandas as ps
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import time
tic=time.time()
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
cred=ps.read_csv("/content/drive/MyDrive/Project - ITE2013 - Big Data - Credit Card Fraud Detection/dataset/creditcard.csv")
cred.head()
Time | V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 | V13 | V14 | V15 | V16 | V17 | V18 | V19 | V20 | V21 | V22 | V23 | V24 | V25 | V26 | V27 | V28 | Amount | Class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | -1.359807 | -0.072781 | 2.536347 | 1.378155 | -0.338321 | 0.462388 | 0.239599 | 0.098698 | 0.363787 | 0.090794 | -0.551600 | -0.617801 | -0.991390 | -0.311169 | 1.468177 | -0.470401 | 0.207971 | 0.025791 | 0.403993 | 0.251412 | -0.018307 | 0.277838 | -0.110474 | 0.066928 | 0.128539 | -0.189115 | 0.133558 | -0.021053 | 149.62 | 0 |
1 | 0.0 | 1.191857 | 0.266151 | 0.166480 | 0.448154 | 0.060018 | -0.082361 | -0.078803 | 0.085102 | -0.255425 | -0.166974 | 1.612727 | 1.065235 | 0.489095 | -0.143772 | 0.635558 | 0.463917 | -0.114805 | -0.183361 | -0.145783 | -0.069083 | -0.225775 | -0.638672 | 0.101288 | -0.339846 | 0.167170 | 0.125895 | -0.008983 | 0.014724 | 2.69 | 0 |
2 | 1.0 | -1.358354 | -1.340163 | 1.773209 | 0.379780 | -0.503198 | 1.800499 | 0.791461 | 0.247676 | -1.514654 | 0.207643 | 0.624501 | 0.066084 | 0.717293 | -0.165946 | 2.345865 | -2.890083 | 1.109969 | -0.121359 | -2.261857 | 0.524980 | 0.247998 | 0.771679 | 0.909412 | -0.689281 | -0.327642 | -0.139097 | -0.055353 | -0.059752 | 378.66 | 0 |
3 | 1.0 | -0.966272 | -0.185226 | 1.792993 | -0.863291 | -0.010309 | 1.247203 | 0.237609 | 0.377436 | -1.387024 | -0.054952 | -0.226487 | 0.178228 | 0.507757 | -0.287924 | -0.631418 | -1.059647 | -0.684093 | 1.965775 | -1.232622 | -0.208038 | -0.108300 | 0.005274 | -0.190321 | -1.175575 | 0.647376 | -0.221929 | 0.062723 | 0.061458 | 123.50 | 0 |
4 | 2.0 | -1.158233 | 0.877737 | 1.548718 | 0.403034 | -0.407193 | 0.095921 | 0.592941 | -0.270533 | 0.817739 | 0.753074 | -0.822843 | 0.538196 | 1.345852 | -1.119670 | 0.175121 | -0.451449 | -0.237033 | -0.038195 | 0.803487 | 0.408542 | -0.009431 | 0.798278 | -0.137458 | 0.141267 | -0.206010 | 0.502292 | 0.219422 | 0.215153 | 69.99 | 0 |
cred = cred.sample(frac=1)
cred
Time | V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 | V13 | V14 | V15 | V16 | V17 | V18 | V19 | V20 | V21 | V22 | V23 | V24 | V25 | V26 | V27 | V28 | Amount | Class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
178103 | 123475.0 | -5.907829 | 4.495036 | -3.682879 | -2.626879 | -0.198306 | 3.447705 | -2.558034 | 3.446840 | 2.201571 | 2.332888 | -0.730184 | 1.157248 | 0.159355 | 0.692754 | 0.724211 | 0.482909 | 0.141694 | -0.297332 | -0.606919 | 0.645331 | -0.095417 | -0.823894 | 0.512631 | 0.608003 | 0.364096 | -0.373512 | -1.393714 | -0.222022 | 1.77 | 0 |
261925 | 160243.0 | -2.783865 | 1.596824 | -2.084844 | 2.512986 | -1.446749 | -0.828496 | -0.732262 | -0.203329 | -0.347046 | -2.162061 | 1.966123 | -3.127456 | 0.506574 | -5.926131 | 0.931091 | -2.499307 | -3.712752 | -1.142133 | 0.626241 | -0.515001 | 0.203563 | 0.293268 | 0.199568 | 0.146868 | 0.163602 | -0.624085 | -1.333100 | 0.428634 | 156.00 | 1 |
194951 | 130837.0 | -1.150428 | -0.791315 | 0.565064 | -2.139782 | 1.352664 | -0.059039 | 0.260391 | 0.125684 | -1.596782 | -0.091853 | -0.230739 | -0.685829 | -0.270623 | -0.019917 | -1.300188 | 1.093228 | -0.182027 | -0.804988 | 1.560827 | 0.533215 | 0.057807 | -0.366581 | -0.154412 | -0.313439 | 0.914807 | -0.196511 | -0.076915 | 0.047318 | 90.70 | 0 |
197961 | 132257.0 | 2.086916 | -0.842936 | -3.973379 | -1.698370 | 2.540840 | 2.677246 | -0.276120 | 0.497714 | -0.974467 | 0.342742 | 0.364130 | -0.759274 | -0.257962 | -0.826524 | -0.072530 | 0.534962 | 1.416166 | -1.382954 | 0.482206 | 0.206675 | 0.296118 | 0.673546 | -0.137164 | 0.675475 | 0.507282 | 0.122100 | -0.052114 | -0.047698 | 91.40 | 0 |
111956 | 72448.0 | -0.795972 | -0.337945 | 1.868059 | 1.050754 | -1.120475 | 0.087213 | -1.124932 | 0.708030 | -1.023988 | 0.285413 | -0.667877 | 0.441433 | 0.751321 | -0.250259 | 0.470302 | -2.091935 | 0.891401 | 1.006323 | -0.498712 | -0.275143 | -0.172551 | -0.152472 | 0.226367 | 0.365930 | -0.484788 | -0.311035 | 0.039018 | -0.114734 | 40.00 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
160326 | 113237.0 | 1.624383 | -1.005744 | 0.412692 | 1.615982 | -1.085810 | 0.945635 | -1.166649 | 0.368163 | 1.954861 | -0.125110 | -1.596324 | 0.606588 | 0.206027 | -0.902252 | -0.464850 | 0.204302 | -0.410422 | 0.368665 | -0.506987 | 0.040387 | 0.247959 | 0.817079 | 0.063272 | 0.618675 | -0.228784 | -0.559651 | 0.092650 | 0.011391 | 139.00 | 0 |
46718 | 42899.0 | -1.213732 | 1.688825 | -0.755056 | -0.912420 | 3.124243 | 3.518855 | -0.720185 | -1.732199 | -1.137526 | -1.860717 | 0.036458 | -0.395282 | -0.530655 | -0.915168 | 0.640247 | 1.222133 | 0.340463 | 1.171805 | -0.883744 | 0.655440 | -1.324803 | -0.018077 | -0.477582 | 0.836884 | 0.728877 | -0.413261 | 0.029606 | 0.135440 | 1.00 | 0 |
59846 | 49032.0 | -2.032651 | 0.409651 | 0.736766 | -1.862659 | 1.526845 | 0.499899 | -0.262421 | -2.474291 | -1.007222 | -1.590705 | 0.276773 | 1.103023 | 0.890970 | 0.441409 | -0.650635 | 1.128877 | -1.287067 | -0.204933 | -1.089240 | 0.400198 | -1.434896 | 0.022223 | 0.391344 | -1.322870 | 0.684427 | 0.550036 | 0.009533 | -0.036957 | 26.22 | 0 |
272256 | 164988.0 | 1.643815 | -2.152240 | 0.613898 | 0.277621 | -2.363581 | 0.597806 | -1.773400 | 0.319186 | 1.632011 | 0.397138 | -2.235965 | 0.090129 | -0.397398 | -1.318790 | -1.546596 | -1.557987 | 0.357367 | 1.408143 | -0.528242 | -0.273783 | -0.194241 | 0.022026 | 0.081441 | -0.068532 | -0.576502 | 0.730318 | 0.027644 | -0.001285 | 200.00 | 0 |
19645 | 30431.0 | -1.361328 | 1.332937 | 0.419559 | 1.178256 | -0.808321 | 1.358962 | -1.425060 | 1.802502 | -0.007368 | -0.564927 | -0.850215 | 0.472257 | -0.984551 | 0.798282 | -0.578186 | -0.007858 | 0.449377 | 0.657700 | 1.406890 | -0.445492 | 0.127248 | 0.136341 | 0.079341 | -1.183486 | -0.833094 | -0.389425 | -0.352892 | -0.009522 | 0.76 | 0 |
284807 rows × 31 columns
#We don't need Time and Class feature as they are unused - saw in other algorithm models
features = cred.drop(["Time","Class"],axis=1)
features
V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 | V13 | V14 | V15 | V16 | V17 | V18 | V19 | V20 | V21 | V22 | V23 | V24 | V25 | V26 | V27 | V28 | Amount | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
178103 | -5.907829 | 4.495036 | -3.682879 | -2.626879 | -0.198306 | 3.447705 | -2.558034 | 3.446840 | 2.201571 | 2.332888 | -0.730184 | 1.157248 | 0.159355 | 0.692754 | 0.724211 | 0.482909 | 0.141694 | -0.297332 | -0.606919 | 0.645331 | -0.095417 | -0.823894 | 0.512631 | 0.608003 | 0.364096 | -0.373512 | -1.393714 | -0.222022 | 1.77 |
261925 | -2.783865 | 1.596824 | -2.084844 | 2.512986 | -1.446749 | -0.828496 | -0.732262 | -0.203329 | -0.347046 | -2.162061 | 1.966123 | -3.127456 | 0.506574 | -5.926131 | 0.931091 | -2.499307 | -3.712752 | -1.142133 | 0.626241 | -0.515001 | 0.203563 | 0.293268 | 0.199568 | 0.146868 | 0.163602 | -0.624085 | -1.333100 | 0.428634 | 156.00 |
194951 | -1.150428 | -0.791315 | 0.565064 | -2.139782 | 1.352664 | -0.059039 | 0.260391 | 0.125684 | -1.596782 | -0.091853 | -0.230739 | -0.685829 | -0.270623 | -0.019917 | -1.300188 | 1.093228 | -0.182027 | -0.804988 | 1.560827 | 0.533215 | 0.057807 | -0.366581 | -0.154412 | -0.313439 | 0.914807 | -0.196511 | -0.076915 | 0.047318 | 90.70 |
197961 | 2.086916 | -0.842936 | -3.973379 | -1.698370 | 2.540840 | 2.677246 | -0.276120 | 0.497714 | -0.974467 | 0.342742 | 0.364130 | -0.759274 | -0.257962 | -0.826524 | -0.072530 | 0.534962 | 1.416166 | -1.382954 | 0.482206 | 0.206675 | 0.296118 | 0.673546 | -0.137164 | 0.675475 | 0.507282 | 0.122100 | -0.052114 | -0.047698 | 91.40 |
111956 | -0.795972 | -0.337945 | 1.868059 | 1.050754 | -1.120475 | 0.087213 | -1.124932 | 0.708030 | -1.023988 | 0.285413 | -0.667877 | 0.441433 | 0.751321 | -0.250259 | 0.470302 | -2.091935 | 0.891401 | 1.006323 | -0.498712 | -0.275143 | -0.172551 | -0.152472 | 0.226367 | 0.365930 | -0.484788 | -0.311035 | 0.039018 | -0.114734 | 40.00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
160326 | 1.624383 | -1.005744 | 0.412692 | 1.615982 | -1.085810 | 0.945635 | -1.166649 | 0.368163 | 1.954861 | -0.125110 | -1.596324 | 0.606588 | 0.206027 | -0.902252 | -0.464850 | 0.204302 | -0.410422 | 0.368665 | -0.506987 | 0.040387 | 0.247959 | 0.817079 | 0.063272 | 0.618675 | -0.228784 | -0.559651 | 0.092650 | 0.011391 | 139.00 |
46718 | -1.213732 | 1.688825 | -0.755056 | -0.912420 | 3.124243 | 3.518855 | -0.720185 | -1.732199 | -1.137526 | -1.860717 | 0.036458 | -0.395282 | -0.530655 | -0.915168 | 0.640247 | 1.222133 | 0.340463 | 1.171805 | -0.883744 | 0.655440 | -1.324803 | -0.018077 | -0.477582 | 0.836884 | 0.728877 | -0.413261 | 0.029606 | 0.135440 | 1.00 |
59846 | -2.032651 | 0.409651 | 0.736766 | -1.862659 | 1.526845 | 0.499899 | -0.262421 | -2.474291 | -1.007222 | -1.590705 | 0.276773 | 1.103023 | 0.890970 | 0.441409 | -0.650635 | 1.128877 | -1.287067 | -0.204933 | -1.089240 | 0.400198 | -1.434896 | 0.022223 | 0.391344 | -1.322870 | 0.684427 | 0.550036 | 0.009533 | -0.036957 | 26.22 |
272256 | 1.643815 | -2.152240 | 0.613898 | 0.277621 | -2.363581 | 0.597806 | -1.773400 | 0.319186 | 1.632011 | 0.397138 | -2.235965 | 0.090129 | -0.397398 | -1.318790 | -1.546596 | -1.557987 | 0.357367 | 1.408143 | -0.528242 | -0.273783 | -0.194241 | 0.022026 | 0.081441 | -0.068532 | -0.576502 | 0.730318 | 0.027644 | -0.001285 | 200.00 |
19645 | -1.361328 | 1.332937 | 0.419559 | 1.178256 | -0.808321 | 1.358962 | -1.425060 | 1.802502 | -0.007368 | -0.564927 | -0.850215 | 0.472257 | -0.984551 | 0.798282 | -0.578186 | -0.007858 | 0.449377 | 0.657700 | 1.406890 | -0.445492 | 0.127248 | 0.136341 | 0.079341 | -1.183486 | -0.833094 | -0.389425 | -0.352892 | -0.009522 | 0.76 |
284807 rows × 29 columns
labels = ps.DataFrame(cred[["Class"]])
labels
Class | |
---|---|
178103 | 0 |
261925 | 1 |
194951 | 0 |
197961 | 0 |
111956 | 0 |
... | ... |
160326 | 0 |
46718 | 0 |
59846 | 0 |
272256 | 0 |
19645 | 0 |
284807 rows × 1 columns
features_array = features.values
features_array
array([[-5.90782921e+00, 4.49503625e+00, -3.68287879e+00, ..., -1.39371378e+00, -2.22022345e-01, 1.77000000e+00], [-2.78386549e+00, 1.59682358e+00, -2.08484399e+00, ..., -1.33309976e+00, 4.28633994e-01, 1.56000000e+02], [-1.15042772e+00, -7.91315138e-01, 5.65063609e-01, ..., -7.69154290e-02, 4.73183520e-02, 9.07000000e+01], ..., [-2.03265075e+00, 4.09650644e-01, 7.36766099e-01, ..., 9.53257400e-03, -3.69568310e-02, 2.62200000e+01], [ 1.64381529e+00, -2.15224043e+00, 6.13898230e-01, ..., 2.76443020e-02, -1.28499500e-03, 2.00000000e+02], [-1.36132758e+00, 1.33293714e+00, 4.19559452e-01, ..., -3.52891758e-01, -9.52247000e-03, 7.60000000e-01]])
labels_array = labels.values
labels_array
array([[0], [1], [0], ..., [0], [0], [0]])
train_feat, test_feat, train_lab, test_lab = train_test_split(features_array,labels_array, train_size=0.90)
train_feat = normalize(train_feat)
test_feat = normalize(test_feat)
#KNN (K-NEAREST NEIGHBOUR) CLASSIFICATION
KNN=KNeighborsClassifier(n_neighbors=5,algorithm="kd_tree",n_jobs=-1)
KNN.fit(train_feat,train_lab.ravel())
KNN_predicted_test_lab=KNN.predict(test_feat)
trueNeg,falsePos,falseNeg,truePos=confusion_matrix(test_lab,KNN_predicted_test_lab).ravel()
accuracy = accuracy_score(test_lab,KNN_predicted_test_lab)
precison = precision_score(test_lab,KNN_predicted_test_lab)
recall = recall_score(test_lab,KNN_predicted_test_lab)
f1_score = f1_score(test_lab,KNN_predicted_test_lab)
print("Confusion Matrix of KNN")
print("True Negative = ",trueNeg," || False Positive = ",falsePos)
print("False Negative = ",falseNeg," || True Positive = ",truePos)
print(" ")
print("SCORES VIA METRICS --")
print("Accuracy ==>",accuracy)
print("Precison ==>",precison)
print("Recall ==>",recall)
print("F1_Score ==>",f1_score)
Confusion Matrix of KNN True Negative = 28423 || False Positive = 4 False Negative = 19 || True Positive = 35 SCORES VIA METRICS -- Accuracy ==> 0.9991924440855307 Precison ==> 0.8974358974358975 Recall ==> 0.6481481481481481 F1_Score ==> 0.7526881720430108
Accuracy = 99.91%
Precision = 89.74%
ReCall = 64.81%
F1_Score = 75.26%